# Packages and dependence
packageCheckClassic <- function(x){
  # 
  for( i in x ){
    if( ! require( i , character.only = TRUE ) ){
      install.packages( i , dependencies = TRUE )
      require( i , character.only = TRUE )
    }
  }
}

packageCheckClassic(c('gridExtra','DESeq2','adegenet','devtools','BiocManager','ggplot2','ggrepel','markdown','pheatmap','RColorBrewer','genefilter','gplots','vegan','dplyr','limma'))
## Loading required package: gridExtra
## Loading required package: DESeq2
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.1.3
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## Loading required package: adegenet
## Loading required package: ade4
## 
## Attaching package: 'ade4'
## The following object is masked from 'package:GenomicRanges':
## 
##     score
## The following object is masked from 'package:BiocGenerics':
## 
##     score
## 
##    /// adegenet 2.1.10 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()
## Loading required package: devtools
## Loading required package: usethis
## Loading required package: BiocManager
## Bioconductor version '3.14' is out-of-date; the current release version '3.18'
##   is available with R version '4.3'; see https://bioconductor.org/install
## 
## Attaching package: 'BiocManager'
## The following object is masked from 'package:devtools':
## 
##     install
## Loading required package: ggplot2
## Loading required package: ggrepel
## Loading required package: markdown
## Loading required package: pheatmap
## Loading required package: RColorBrewer
## Loading required package: genefilter
## 
## Attaching package: 'genefilter'
## The following objects are masked from 'package:MatrixGenerics':
## 
##     rowSds, rowVars
## The following objects are masked from 'package:matrixStats':
## 
##     rowSds, rowVars
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
## 
##     space
## The following object is masked from 'package:S4Vectors':
## 
##     space
## The following object is masked from 'package:stats':
## 
##     lowess
## Loading required package: vegan
## Loading required package: permute
## 
## Attaching package: 'permute'
## The following object is masked from 'package:devtools':
## 
##     check
## Loading required package: lattice
## This is vegan 2.6-4
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: limma
## Warning: package 'limma' was built under R version 4.1.3
## 
## Attaching package: 'limma'
## The following object is masked from 'package:DESeq2':
## 
##     plotMA
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
#BiocManager::install('tximport', force = TRUE)
#BiocManager::install('apeglm')
#BiocManager::install('ashr')
#BiocManager::install("EnhancedVolcano")
#BiocManager::install("arrayQualityMetrics")
if (!require(devtools)) install.packages("devtools")
devtools::install_github("yanlinlin82/ggvenn")
## Skipping install of 'ggvenn' from a github remote, the SHA1 (69c36ce6) has not changed since last install.
##   Use `force = TRUE` to force installation
library("adegenet")
library('ggvenn')
## Loading required package: grid
library('tximport')
library('apeglm')
library('ashr')
library('pairwiseAdonis')
## Loading required package: cluster
library('EnhancedVolcano')
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
library('BiocManager')
source_url("https://raw.githubusercontent.com/obigriffith/biostar-tutorials/master/Heatmaps/heatmap.3.R")
## ℹ SHA-1 hash of file is "015fc0457e61e3e93a903e69a24d96d2dac7b9fb"
# Functions

trimFunction <- function(resLFC,p_adj_cut,lfc_cut) {
  resOrdered<-resLFC[order(resLFC$padj),]
  y<-na.omit(resOrdered)
  resTrim<-y[y$padj < p_adj_cut,]
  resTrim <- resTrim[ which( resTrim$log2FoldChange > lfc_cut | resTrim$log2FoldChange < -lfc_cut), ]
  resTrim <- as.data.frame(resTrim)
  resTrim$genes <- rownames(resTrim)
  return(resTrim)
}

applyAnnot <- function(inputDf,inputDfAnnot) {
  gene_and_annot_col <- character(0)
  inputDf_annot_genes <- inputDfAnnot$genes
  inputDf_annot_pfam <- inputDfAnnot$pfam_annotation
  for (i in 1:length(inputDf_annot_genes)) {
    gene_and_annot_col <- c(gene_and_annot_col, 
                            paste(inputDf_annot_genes[i], " - ", inputDf_annot_pfam[i]))
  }
  inputDf_genes <- inputDf$genes
  new_gene_annot_col <- character(0)
  for (i in inputDf_genes) {
    gene <- i
    for (j in gene_and_annot_col) {
      j_split <- strsplit(j, " ", fixed = TRUE)[[1]]
      if (i %in% j_split[1]) {
        gene <- j
      }
    }
    new_gene_annot_col <- c(new_gene_annot_col, gene)
  }
  inputDf$genes <- new_gene_annot_col
  return(inputDf)
}

heatmapFunction <- function(newColNames,commonGenes,commonGenesAll,vsd,nameCode) {
  vsdCommonGm <- vsd[commonGenes$genes,]
  vsdCommonGm@colData@rownames = newColNames
  annot_genes = commonGenesAll$genes
  genes = names(vsdCommonGm)
  
  new_names = list()
  for (i in annot_genes) {
    gene = i
    for (j in genes) {
      if (i == j) {
        gene = j
      }
    }
    new_names <- c(new_names,gene)
  }
  names(vsdCommonGm) <- new_names
  pheatmap(assay(vsdCommonGm),main=nameCode,scale="row", cluster_rows=T, show_rownames=T,
           cluster_cols=FALSE,cellwidth = 20,fontsize_row = 8)
  return(vsdCommonGm)
}

similarityFunction <- function(dataset1,dataset2) {
  c = 0
  for (i in rownames(dataset1)) {
    if (i %in% rownames(dataset2)) {
      c = c + 1
    }
  }
  sim = (c / as.integer(length(rownames(dataset2)))) * 100
  return(sim)
}

#### May 2018 ####

scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samplesSP<-read.table('tximport_design_SP_may2018_inversed.txt',header=T)
samplesPV<-read.table('tximport_design_PV_may2018_inversed.txt',header=T)
samplesGM<-read.table('tximport_design_GM_may2018_inversed.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/may2018'
outputPath<-'/Users/mmeynadier/Documents/Astroides/comparative_transcriptomics_astroides/output/DESeq2/annotatedGenome/adult/trueTransplant/'
setwd(dataPath)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/may2018", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
raw_counts_sp <- raw_counts[,grep("may2018_sp_sp|may2018_gm_sp", colnames(raw_counts))] 
raw_counts_pv <- raw_counts[,grep("may2018_pv_pv|may2018_gm_pv", colnames(raw_counts))] 
raw_counts_gm <- raw_counts[,grep("may2018_gm_gm|may2018_sp_gm|may2018_pv_gm", colnames(raw_counts))] 
#samples_order_sp = c(5,6,7,8,9,10,11,12,1,2,3,4)
#samples_order_pv = c(6,7,8,9,10,11,12,13,14,1,2,3,4,5)
#samples_order_gm = c(1,2,3,4,5,6,7,8,14,15,16,17,18,9,10,11,12,13)
#raw_counts_sp <- raw_counts_sp[,samples_order_sp]
#raw_counts_pv <- raw_counts_pv[,samples_order_pv]
#raw_counts_gm <- raw_counts_gm[,samples_order_gm]

#### SP ####

# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_sp, colData = samplesSP,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10 
dds <- dds[keep,]

# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
##      [,1]                                                    
## [1,] "Intercept"                                             
## [2,] "originSite_finalSite_experiment_sp_sp_bck_vs_gm_sp_trt"
## [3,] "originSite_finalSite_experiment_sp_sp_tro_vs_gm_sp_trt"
sp_sp_bck_VS_sp_sp_tro_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","sp_sp_tro"), lfcThreshold = 1,alpha = 0.05)
sp_sp_bck_VS_sp_sp_tro_lfc2_intra<-trimFunction(sp_sp_bck_VS_sp_sp_tro_lfc2_intra,0.05,1)

sp_sp_bck_VS_gm_sp_trt_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","gm_sp_trt"), lfcThreshold = 1,alpha = 0.05)
sp_sp_bck_VS_gm_sp_trt_lfc2_intra<-trimFunction(sp_sp_bck_VS_gm_sp_trt_lfc2_intra,0.05,1)

rownames1_intra = row.names(sp_sp_bck_VS_gm_sp_trt_lfc2_intra)
rownames2_intra = row.names(sp_sp_bck_VS_sp_sp_tro_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- data.frame(genes=rownames_intra)

rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
rownames_annot_all_intra
##                          genes
## 1      STRG.17677  -  UPAR_LY6
## 2           STRG.2411  -  p450
## 3            STRG.6873  -  Ank
## 4                   STRG.40812
## 5       STRG.30087  -  DUF4795
## 6 STRG.40633  -  L51_S25_CI-B8
## 7      STRG.24627  -  Na_Ca_ex
vsd = vst(dds,blind=T)
newColNames <- samplesSP$originSite_finalSite_experiment

vsd_common_gm_lfc2_intra_may2018_SP = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"May2018 SP")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - SP") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesSP,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesSP, method = "euclidian")
##                                 Df SumOfSqs      R2      F Pr(>F)
## originSite_finalSite_experiment  2    12627 0.17386 1.0522  0.297
## Residual                        10    60003 0.82614              
## Total                           12    72631 1.00000
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesSP$originSite_finalSite_experiment)
pair.mod
##                    pairs Df SumsOfSqs   F.Model        R2 p.value p.adjusted
## 1 sp_sp_bck vs sp_sp_tro  1  5061.903 0.9833413 0.1408124   0.592      1.000
## 2 sp_sp_bck vs gm_sp_trt  1  6885.942 1.0566985 0.1497440   0.283      0.849
## 3 sp_sp_tro vs gm_sp_trt  1  6857.458 1.0967089 0.1205611   0.196      0.588
##   sig
## 1    
## 2    
## 3
mod <- betadisper(dist_tab_assay,samplesSP$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                           diff       lwr       upr     p adj
## sp_sp_bck-gm_sp_trt -17.440163 -42.43355  7.553227 0.1853302
## sp_sp_tro-gm_sp_trt -10.375568 -32.02048 11.269343 0.4195501
## sp_sp_tro-sp_sp_bck   7.064595 -17.92879 32.057985 0.7260918
#### PV ####

# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_pv, colData = samplesPV,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10 
dds <- dds[keep,]

# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
##      [,1]                                                    
## [1,] "Intercept"                                             
## [2,] "originSite_finalSite_experiment_pv_pv_bck_vs_gm_pv_trt"
## [3,] "originSite_finalSite_experiment_pv_pv_tro_vs_gm_pv_trt"
pv_pv_bck_VS_pv_pv_tro_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","pv_pv_tro"), lfcThreshold = 1,alpha = 0.05)
pv_pv_bck_VS_pv_pv_tro_lfc2_intra<-trimFunction(pv_pv_bck_VS_pv_pv_tro_lfc2_intra,0.05,1)

pv_pv_bck_VS_gm_pv_trt_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","gm_pv_trt"), lfcThreshold = 1,alpha = 0.05)
pv_pv_bck_VS_gm_pv_trt_lfc2_intra<-trimFunction(pv_pv_bck_VS_gm_pv_trt_lfc2_intra,0.05,1)

rownames1_intra = row.names(pv_pv_bck_VS_gm_pv_trt_lfc2_intra)
rownames2_intra = row.names(pv_pv_bck_VS_pv_pv_tro_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- data.frame(genes=rownames_intra)

rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
rownames_annot_all_intra
##                            genes
## 1                     STRG.41006
## 2                     STRG.38928
## 3                     STRG.38500
## 4           STRG.28623  -  Sushi
## 5         STRG.19337  -  Pkinase
## 6                     STRG.12476
## 7             STRG.25239  -  EGF
## 8                     STRG.21464
## 9     STRG.37592  -  Aminotran_3
## 10                    STRG.23879
## 11        STRG.32899  -  zf-MYND
## 12          STRG.38652  -  Sushi
## 13                    STRG.31485
## 14          STRG.41361  -  I-set
## 15         STRG.1980  -  Pkinase
## 16                    STRG.39997
## 17             STRG.7292  -  fn3
## 18                    STRG.41855
## 19            STRG.20559  -  UPA
## 20 STRG.27513  -  Zona_pellucida
## 21                     STRG.2863
## 22                     STRG.8151
## 23                     STRG.2883
vsd = vst(dds,blind=T)
newColNames <- samplesPV$originSite_finalSite_experiment

vsd_common_gm_lfc2_intra_may2018_PV = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"May2018 PV")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - PV") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesPV,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesPV, method = "euclidian")
##                                 Df SumOfSqs      R2      F Pr(>F)
## originSite_finalSite_experiment  2    10187 0.15671 1.0221  0.372
## Residual                        11    54814 0.84329              
## Total                           13    65001 1.00000
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesPV$originSite_finalSite_experiment)
pair.mod
##                    pairs Df SumsOfSqs   F.Model        R2 p.value p.adjusted
## 1 pv_pv_bck vs pv_pv_tro  1  4941.366 0.9603022 0.1206364   0.563      1.000
## 2 pv_pv_bck vs gm_pv_trt  1  5368.479 1.1247097 0.1578604   0.228      0.684
## 3 pv_pv_tro vs gm_pv_trt  1  5017.357 1.0041443 0.1003728   0.464      1.000
##   sig
## 1    
## 2    
## 3
mod <- betadisper(dist_tab_assay,samplesPV$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                          diff        lwr      upr     p adj
## pv_pv_bck-gm_pv_trt -4.109184 -21.894782 13.67641 0.8102582
## pv_pv_tro-gm_pv_trt  4.560327 -10.186712 19.30737 0.6899343
## pv_pv_tro-pv_pv_bck  8.669511  -8.551321 25.89034 0.3938340
#### GM ####

# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_gm, colData = samplesGM,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10 
dds <- dds[keep,]

# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
##      [,1]                                                    
## [1,] "Intercept"                                             
## [2,] "originSite_finalSite_experiment_gm_gm_tro_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_pv_gm_trt_vs_gm_gm_bck"
## [4,] "originSite_finalSite_experiment_sp_gm_trt_vs_gm_gm_bck"
gm_gm_bck_VS_gm_gm_tro_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","gm_gm_tro"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_gm_gm_tro_lfc2_intra<-trimFunction(gm_gm_bck_VS_gm_gm_tro_lfc2_intra,0.05,1)

gm_gm_bck_VS_sp_gm_trt_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","sp_gm_trt"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_sp_gm_trt_lfc2_intra<-trimFunction(gm_gm_bck_VS_sp_gm_trt_lfc2_intra,0.05,1)

gm_gm_bck_VS_pv_gm_trt_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","pv_gm_trt"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_pv_gm_trt_lfc2_intra<-trimFunction(gm_gm_bck_VS_pv_gm_trt_lfc2_intra,0.05,1)


rownames1_intra = row.names(gm_gm_bck_VS_gm_gm_tro_lfc2_intra)
rownames2_intra = row.names(gm_gm_bck_VS_sp_gm_trt_lfc2_intra)
rownames3_intra = row.names(gm_gm_bck_VS_pv_gm_trt_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- union(rownames_intra,rownames3_intra)
rownames_intra <- data.frame(genes=rownames_intra)

rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
rownames_annot_all_intra
##                             genes
## 1               STRG.3820  -  TNF
## 2      STRG.15249  -  DNA_pol_B_2
## 3                      STRG.19979
## 4              STRG.6534  -  DERM
## 5         STRG.33033  -  UPAR_LY6
## 6      STRG.34473  -  ATP-synt_ab
## 7              STRG.16889  -  EGF
## 8            STRG.24509  -  ATG27
## 9          STRG.19197  -  zf-TRAF
## 10          STRG.40723  -  Bax1-I
## 11         STRG.36683  -  CAP_GLY
## 12                     STRG.27219
## 13    STRG.12145  -  F5_F8_type_C
## 14         STRG.31012  -  Filamin
## 15                     STRG.32554
## 16  STRG.33979  -  Cytochrom_B561
## 17                     STRG.36363
## 18             STRG.35320  -  TNF
## 19                     STRG.26700
## 20   STRG.17139  -  FAD_binding_1
## 21                     STRG.36671
## 22     STRG.5029  -  Helicase_C_2
## 23                     STRG.21464
## 24             STRG.40720  -  COR
## 25         STRG.19337  -  Pkinase
## 26                     STRG.14152
## 27           STRG.32261  -  I-set
## 28             STRG.33364  -  SDF
## 29             STRG.39645  -  EGF
## 30                     STRG.41459
## 31            STRG.28658  -  LUC7
## 32            STRG.40373  -  cEGF
## 33           STRG.42968  -  DOMON
## 34        STRG.24860  -  Collagen
## 35             STRG.34745  -  fn3
## 36        STRG.3620  -  zf-CCHC_6
## 37                     STRG.24070
## 38                     STRG.30013
## 39          STRG.9452  -  CAP_GLY
## 40 STRG.40542  -  Phage_integrase
## 41         STRG.39050  -  DUF3990
## 42         STRG.39049  -  DUF3990
## 43                     STRG.12227
## 44                     STRG.14413
## 45   STRG.30824  -  Amino_oxidase
## 46      STRG.35964  -  Beta_helix
## 47                       STRG.955
## 48                     STRG.35123
## 49         STRG.12039  -  Pkinase
## 50              STRG.6873  -  Ank
## 51                     STRG.34377
vsd = vst(dds,blind=T)
newColNames <- samplesGM$originSite_finalSite_experiment

vsd_common_gm_lfc2_intra_may2018_GM = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"May2018 GM")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - GM") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

# Inference statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesGM,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesGM, method = "euclidian")
##                                 Df SumOfSqs      R2      F Pr(>F)
## originSite_finalSite_experiment  3    13991 0.20697 1.1309  0.153
## Residual                        13    53609 0.79303              
## Total                           16    67600 1.00000
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesGM$originSite_finalSite_experiment)
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
##                    pairs Df SumsOfSqs   F.Model        R2 p.value p.adjusted
## 1 gm_gm_bck vs gm_gm_tro  1  3550.331 1.0202444 0.1453289   0.444      1.000
## 2 gm_gm_bck vs sp_gm_trt  1  4915.474 1.2853949 0.2045050   0.218      1.000
## 3 gm_gm_bck vs pv_gm_trt  1  4025.316 0.9543117 0.1372259   0.595      1.000
## 4 gm_gm_tro vs sp_gm_trt  1  5824.720 1.4406988 0.1706848   0.101      0.606
## 5 gm_gm_tro vs pv_gm_trt  1  5112.247 1.1858412 0.1290945   0.168      1.000
## 6 sp_gm_trt vs pv_gm_trt  1  4365.598 0.9336818 0.1176858   0.584      1.000
##   sig
## 1    
## 2    
## 3    
## 4    
## 5    
## 6
mod <- betadisper(dist_tab_assay,samplesGM$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                          diff         lwr       upr     p adj
## gm_gm_tro-gm_gm_bck 10.547822  -3.1222831 24.217926 0.1573278
## pv_gm_trt-gm_gm_bck 17.941110   4.2710049 31.611214 0.0094459
## sp_gm_trt-gm_gm_bck 13.399350  -0.8971875 27.695888 0.0692403
## pv_gm_trt-gm_gm_tro  7.393288  -4.4453699 19.231946 0.3024017
## sp_gm_trt-gm_gm_tro  2.851529  -9.7052644 15.408321 0.9077883
## sp_gm_trt-pv_gm_trt -4.541759 -17.0985524  8.015033 0.7176656
#### Sept2018 ####

scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samplesSP<-read.table('tximport_design_SP_sept2018_inversed.txt',header=T)
samplesPV<-read.table('tximport_design_PV_sept2018_inversed.txt',header=T)
samplesGM<-read.table('tximport_design_GM_sept2018_inversed.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/sept2018'
outputPath<-'/Users/mmeynadier/Documents/Astroides/comparative_transcriptomics_astroides/output/DESeq2/annotatedGenome/adult/trueTransplant/'
setwd(dataPath)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/sept2018", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
raw_counts_sp <- raw_counts[,grep("sept2018_sp_sp|sept2018_gm_sp", colnames(raw_counts))] 
raw_counts_pv <- raw_counts[,grep("sept2018_pv_pv|sept2018_gm_pv", colnames(raw_counts))] 
raw_counts_gm <- raw_counts[,grep("sept2018_gm_gm|sept2018_sp_gm|sept2018_pv_gm", colnames(raw_counts))] 
#samples_order_sp = c(8,9,10,11,12,13,14,15,16,1,2,3,4,5,6,7)
#samples_order_pv = c(7,8,9,10,11,12,13,14,15,1,2,3,4,5,6)
#samples_order_gm = c(1,2,3,4,5,6,7,8,9,10,18,19,20,21,22,23,24,11,12,13,14,15,16,17)
#raw_counts_sp <- raw_counts_sp[,samples_order_sp]
#raw_counts_pv <- raw_counts_pv[,samples_order_pv]
#raw_counts_gm <- raw_counts_gm[,samples_order_gm]

#### SP ####

# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_sp, colData = samplesSP,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10 
dds <- dds[keep,]

# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 151 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
cbind(resultsNames(dds))
##      [,1]                                                    
## [1,] "Intercept"                                             
## [2,] "originSite_finalSite_experiment_sp_sp_bck_vs_gm_sp_gas"
## [3,] "originSite_finalSite_experiment_sp_sp_gas_vs_gm_sp_gas"
sp_sp_bck_VS_sp_sp_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","sp_sp_gas"), lfcThreshold = 1,alpha = 0.05)
sp_sp_bck_VS_sp_sp_gas_lfc2_intra<-trimFunction(sp_sp_bck_VS_sp_sp_gas_lfc2_intra,0.05,1)

sp_sp_bck_VS_gm_sp_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","gm_sp_gas"), lfcThreshold = 1,alpha = 0.05)
sp_sp_bck_VS_gm_sp_gas_lfc2_intra<-trimFunction(sp_sp_bck_VS_gm_sp_gas_lfc2_intra,0.05,1)

rownames1_intra = row.names(sp_sp_bck_VS_gm_sp_gas_lfc2_intra)
rownames2_intra = row.names(sp_sp_bck_VS_sp_sp_gas_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- data.frame(genes=rownames_intra)

rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
rownames_annot_all_intra
##                           genes
## 1                     STRG.9055
## 2            STRG.34841  -  SEA
## 3            STRG.34850  -  BEN
## 4  STRG.13067  -  Peptidase_C14
## 5                    STRG.32612
## 6           STRG.34749  -  CHAT
## 7                     STRG.2331
## 8                    STRG.36150
## 9            STRG.25239  -  EGF
## 10 STRG.15785  -  HCO3_cotransp
## 11    STRG.35964  -  Beta_helix
## 12                   STRG.40146
## 13           STRG.27187  -  A2M
## 14                   STRG.32829
vsd = vst(dds,blind=T)
newColNames <- samplesSP$originSite_finalSite_experiment

vsd_common_gm_lfc2_intra_sept2018_SP = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"Sept2018 SP")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - SP") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

# Inference statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesSP,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesSP, method = "euclidian")
##                                 Df SumOfSqs     R2      F Pr(>F)
## originSite_finalSite_experiment  2    11772 0.1439 1.0926  0.271
## Residual                        13    70034 0.8561              
## Total                           15    81806 1.0000
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesSP$originSite_finalSite_experiment)
pair.mod
##                    pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted
## 1 sp_sp_bck vs sp_sp_gas  1  4546.240 0.8842399 0.11215284   0.643      1.000
## 2 sp_sp_bck vs gm_sp_gas  1  6251.666 1.1727958 0.12785587   0.249      0.747
## 3 sp_sp_gas vs gm_sp_gas  1  6531.885 1.1695675 0.09610592   0.216      0.648
##   sig
## 1    
## 2    
## 3
mod <- betadisper(dist_tab_assay,samplesSP$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                           diff        lwr         upr     p adj
## sp_sp_bck-gm_sp_gas -15.703378 -31.369793 -0.03696242 0.0494400
## sp_sp_gas-gm_sp_gas  -1.768251 -14.398919 10.86241680 0.9278572
## sp_sp_gas-sp_sp_bck  13.935127  -2.118172 29.98842524 0.0926770
#### PV ####

# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_pv, colData = samplesPV,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10 
dds <- dds[keep,]

# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 142 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
cbind(resultsNames(dds))
##      [,1]                                                    
## [1,] "Intercept"                                             
## [2,] "originSite_finalSite_experiment_pv_pv_bck_vs_gm_pv_gas"
## [3,] "originSite_finalSite_experiment_pv_pv_gas_vs_gm_pv_gas"
pv_pv_bck_VS_pv_pv_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","pv_pv_gas"), lfcThreshold = 1,alpha = 0.05)
pv_pv_bck_VS_pv_pv_gas_lfc2_intra<-trimFunction(pv_pv_bck_VS_pv_pv_gas_lfc2_intra,0.05,1)

pv_pv_bck_VS_gm_pv_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","gm_pv_gas"), lfcThreshold = 1,alpha = 0.05)
pv_pv_bck_VS_gm_pv_gas_lfc2_intra<-trimFunction(pv_pv_bck_VS_gm_pv_gas_lfc2_intra,0.05,1)

rownames1_intra = row.names(pv_pv_bck_VS_gm_pv_gas_lfc2_intra)
rownames2_intra = row.names(pv_pv_bck_VS_pv_pv_gas_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- data.frame(genes=rownames_intra)

rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
rownames_annot_all_intra
##                          genes
## 1      STRG.23232  -  zf-C3HC4
## 2                   STRG.39997
## 3                   STRG.22973
## 4       STRG.18537  -  BRICHOS
## 5           STRG.39645  -  EGF
## 6         STRG.42631  -  Bcl-2
## 7       STRG.34768  -  TMEM220
## 8                   STRG.19185
## 9                   STRG.41006
## 10       STRG.5456  -  DUF3987
## 11           STRG.6156  -  fn3
## 12    STRG.28262  -  Laminin_B
## 13                   STRG.4831
## 14                   STRG.3197
## 15  STRG.37728  -  Laminin_G_3
## 16           STRG.679  -  HECT
## 17       STRG.42790  -  Fringe
## 18 STRG.5258  -  Amino_oxidase
## 19         STRG.36828  -  PALP
vsd = vst(dds,blind=T)
newColNames <- samplesPV$originSite_finalSite_experiment

vsd_common_gm_lfc2_intra_sept2018_PV = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"Sept2018 PV")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - PV") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

# Inference statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesPV,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesPV, method = "euclidian")
##                                 Df SumOfSqs      R2      F Pr(>F)  
## originSite_finalSite_experiment  2    13018 0.18276 1.4536  0.012 *
## Residual                        13    58214 0.81724                
## Total                           15    71232 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesPV$originSite_finalSite_experiment)
pair.mod
##                    pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted sig
## 1 pv_pv_bck vs pv_pv_gas  1  4059.201 0.906614 0.1146653   0.661      1.000    
## 2 pv_pv_bck vs gm_pv_gas  1  7468.260 1.683977 0.1738931   0.031      0.093    
## 3 pv_pv_gas vs gm_pv_gas  1  7467.692 1.655904 0.1308404   0.004      0.012   .
mod <- betadisper(dist_tab_assay,samplesPV$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                           diff        lwr       upr     p adj
## pv_pv_bck-gm_pv_gas -8.2040678 -22.624231  6.216096 0.3218336
## pv_pv_gas-gm_pv_gas -0.2839956 -11.909903 11.341912 0.9977094
## pv_pv_gas-pv_pv_bck  7.9200722  -6.856198 22.696343 0.3620039
#### GM ####


# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_gm, colData = samplesGM,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10 
dds <- dds[keep,]

# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 138 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
cbind(resultsNames(dds))
##      [,1]                                                    
## [1,] "Intercept"                                             
## [2,] "originSite_finalSite_experiment_gm_gm_gas_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_pv_gm_gas_vs_gm_gm_bck"
## [4,] "originSite_finalSite_experiment_sp_gm_gas_vs_gm_gm_bck"
gm_gm_bck_VS_gm_gm_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","gm_gm_gas"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_gm_gm_gas_lfc2_intra<-trimFunction(gm_gm_bck_VS_gm_gm_gas_lfc2_intra,0.05,1)

gm_gm_bck_VS_sp_gm_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","sp_gm_gas"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_sp_gm_gas_lfc2_intra<-trimFunction(gm_gm_bck_VS_sp_gm_gas_lfc2_intra,0.05,1)

gm_gm_bck_VS_pv_gm_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","pv_gm_gas"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_pv_gm_gas_lfc2_intra<-trimFunction(gm_gm_bck_VS_pv_gm_gas_lfc2_intra,0.05,1)

rownames1_intra = row.names(gm_gm_bck_VS_gm_gm_gas_lfc2_intra)
rownames2_intra = row.names(gm_gm_bck_VS_sp_gm_gas_lfc2_intra)
rownames3_intra = row.names(gm_gm_bck_VS_pv_gm_gas_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- union(rownames_intra,rownames3_intra)
rownames_intra <- data.frame(genes=rownames_intra)

rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
rownames_annot_all_intra
##                             genes
## 1                      STRG.16155
## 2            STRG.31389  -  HSP70
## 3            STRG.27900  -  HSP70
## 4                      STRG.41130
## 5                       STRG.2331
## 6        STRG.30187  -  EF-hand_1
## 7                      STRG.14942
## 8            STRG.1286  -  Chromo
## 9               STRG.3820  -  TNF
## 10              STRG.7740  -  fn3
## 11            STRG.42069  -  SOUL
## 12          STRG.42790  -  Fringe
## 13        STRG.24860  -  Collagen
## 14                     STRG.22973
## 15             STRG.20559  -  UPA
## 16          STRG.15024  -  DAZAP2
## 17                      STRG.1287
## 18                     STRG.14152
## 19  STRG.29420  -  Ribosomal_L18A
## 20         STRG.41408  -  Pkinase
## 21   STRG.105  -  Ribosomal_L6e_N
## 22       STRG.17312  -  RT_RNaseH
## 23    STRG.24939  -  2OG-FeII_Oxy
## 24  STRG.22251  -  S-methyl_trans
## 25          STRG.24042  -  AAA_11
## 26              STRG.1170  -  CUB
## 27  STRG.13105  -  Cu2_monooxygen
## 28           STRG.24510  -  7tm_3
## 29         STRG.708  -  GDA1_CD39
## 30            STRG.30962  -  Mei4
## 31            STRG.15023  -  MoaC
## 32             STRG.22537  -  HLH
## 33                     STRG.17988
## 34                      STRG.2883
## 35             STRG.6534  -  DERM
## 36                       STRG.261
## 37        STRG.32553  -  UPAR_LY6
## 38                      STRG.2113
## 39        STRG.34588  -  Collagen
## 40                     STRG.41850
## 41           STRG.23759  -  MIEAP
## 42            STRG.40373  -  cEGF
## 43              STRG.8640  -  EGF
## 44           STRG.42227  -  RVT_1
## 45    STRG.12613  -  Fibrinogen_C
## 46        STRG.27038  -  Forkhead
## 47                     STRG.11433
## 48           STRG.29417  -  NACHT
## 49           STRG.37064  -  HSP70
## 50                     STRG.18099
## 51             STRG.36750  -  VWA
## 52             STRG.119  -  MIEAP
## 53                     STRG.24933
## 54          STRG.12709  -  CBM_14
## 55   STRG.16167  -  An_peroxidase
## 56   STRG.29418  -  Acyl-CoA_dh_1
## 57           STRG.42225  -  MIEAP
## 58           STRG.16136  -  RVT_1
## 59                     STRG.30013
## 60             STRG.3398  -  cEGF
## 61           STRG.37065  -  MIEAP
## 62                     STRG.40148
## 63           STRG.12662  -  EGF_3
## 64           STRG.40142  -  HSP70
## 65           STRG.42230  -  HSP70
## 66  STRG.40093  -  Cu2_monooxygen
## 67    STRG.40094  -  Cu2_monoox_C
## 68             STRG.1714  -  Leg1
## 69             STRG.18100  -  KDZ
## 70           STRG.40147  -  MIEAP
## 71 STRG.40647  -  Alk_phosphatase
## 72                     STRG.13719
## 73            STRG.5475  -  I-set
## 74           STRG.41593  -  MAPEG
## 75      STRG.35964  -  Beta_helix
## 76          STRG.4914  -  Astacin
## 77           STRG.42229  -  MIEAP
## 78         STRG.27801  -  Astacin
## 79             STRG.22986  -  EGF
## 80       STRG.28262  -  Laminin_B
## 81 STRG.27870  -  Phospholip_A2_3
## 82                     STRG.18096
## 83            STRG.23259  -  SRCR
## 84        STRG.4770  -  Anoctamin
## 85                     STRG.17793
## 86           STRG.20517  -  TSP_1
## 87    STRG.2780  -  Amino_oxidase
## 88    STRG.2781  -  Amino_oxidase
## 89                     STRG.26149
## 90            STRG.14937  -  CxC3
## 91    STRG.17200  -  F5_F8_type_C
## 92         STRG.32403  -  Filamin
## 93           STRG.37066  -  MIEAP
## 94         STRG.15029  -  DUF1647
## 95             STRG.3466  -  DEAD
## 96                      STRG.2402
vsd = vst(dds,blind=T)
newColNames <- samplesGM$originSite_finalSite_experiment

vsd_common_gm_lfc2_intra_sept2018_GM = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"Sept2018 GM")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - GM") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

# Inference statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesGM,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesGM, method = "euclidian")
##                                 Df SumOfSqs      R2    F Pr(>F)    
## originSite_finalSite_experiment  3    43790 0.23448 1.94  0.001 ***
## Residual                        19   142960 0.76552                
## Total                           22   186750 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesGM$originSite_finalSite_experiment)
pair.mod
##                    pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted sig
## 1 gm_gm_bck vs gm_gm_gas  1  10658.42 1.330685 0.1426139   0.067      0.402    
## 2 gm_gm_bck vs sp_gm_gas  1  19840.62 2.521545 0.2396554   0.008      0.048   .
## 3 gm_gm_bck vs pv_gm_gas  1  14656.40 1.691785 0.1946419   0.054      0.324    
## 4 gm_gm_gas vs sp_gm_gas  1  17254.23 2.515277 0.1732848   0.001      0.006   *
## 5 gm_gm_gas vs pv_gm_gas  1  13144.63 1.807099 0.1411014   0.002      0.012   .
## 6 sp_gm_gas vs pv_gm_gas  1  12141.07 1.693049 0.1333839   0.001      0.006   *
mod <- betadisper(dist_tab_assay,samplesGM$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                          diff       lwr      upr     p adj
## gm_gm_gas-gm_gm_bck -7.442902 -31.42738 16.54157 0.8188379
## pv_gm_gas-gm_gm_bck -5.248574 -29.82535 19.32820 0.9306434
## sp_gm_gas-gm_gm_bck -8.477402 -32.46187 15.50707 0.7546381
## pv_gm_gas-gm_gm_gas  2.194328 -17.14257 21.53123 0.9883982
## sp_gm_gas-gm_gm_gas -1.034500 -19.61279 17.54379 0.9985829
## sp_gm_gas-pv_gm_gas -3.228828 -22.56573 16.10807 0.9648532